### Before you can run these examples, you will need to download and install...
#...the packages below. These also require the installation of Tool Chain ++
# See this webpage for installation instructions: 
# https://github.com/rmcelreath/rethinking#installation 
library(StanHeaders)
library(cmdstanr)
library(rstan)
library(ggplot2)
library(rethinking)

#Upload the data provided. Change the according to the path on your machine. 
d <- read.csv("D:/Teaching/Bayes Reg/data_birthweight.csv")
head(d)
summary(d)

### Compile a list with only the two variables of interest. There are no missing data in those variables.
dl <- list(
  mw = d$mw,
  bw = d$bw )


### Run the Model using the function "ulam"
set.seed(12593) #Used to ensure the output is replicated
m1<- ulam(
  alist(
    bw ~ dnorm( mu, sigma) ,
    mu<- a + b*(mw-72.1),
    a ~ dnorm(3300, 600),
    b ~ dnorm(0, 25),
    sigma ~ dunif(0, 1000)
  ), data=dl, chains=6, cores=6)
#Check the run of the model
show(m1)
#Check the results in tabular form, as well as some key diagnostics 
precis(m1)
#Run further diagnostics for the algorithm. See "Statistical Rethinking" book...
#...particularly Chapter 9, for more information
traceplot(m1)
trankplot(m1)
#This command also provides information regarding the variances and covariances...
#...of parameters in the model
round(vcov(m1), 3)
#...variances and covariances can also be plotted:
pairs(m1)

#reset plots to show only one graph at a time
old.par <- par(mfrow=c(1, 1))



#### Creates Plots from posterior ####

# Set a seed number if you want to replicate this draw
set.seed(12593)
# Extract samples from the posterior distribution using "rethinking" package...
#...function "extract.samples"
post<-extract.samples(m1)
# First, plot the observed data
plot(bw ~ mw, data=d, pch=19, col="firebrick2", xlab="Maternal Weight (Kg)", 
     ylab="Newborn Birth Weight (g)", cex.lab=1.5, cex=1.5)
# Take the mean of the "a" and "b" parameters from the posterior distribution extracted above
a_map<-mean(post$a)
b_map<-mean(post$b)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
curve(a_map+b_map*(x-72.1), lwd=6, add=TRUE, col="firebrick4")

#### Add information about uncertainty around the central values ####

#Clear the previous plot before proceeding
#Define a sequence of maternal weights to compute predictions for these.
#The sequence here is 35 to 120 kg, increasing by 1 Kg.
mw.seq<-seq(from=35, to=120, by=1)
#Now use the "link" function from "rethinking" package to compute mu ...
#...for each sample from the posterior as a function of each maternal weight...
#...in the mw.sequence created above
set.seed(12593)
mu<-link(m1, data=data.frame(mw=mw.seq))
#Summarise the distribution of mu, taking its mean and its credibility interval
mu.mean<-apply(mu, 2, mean)
mu.ci<-apply(mu, 2, PI, prob=0.89)
# Plot the observed data
plot(bw ~ mw, data=d, pch=19, col="firebrick2", xlab="Maternal Weight (Kg)", 
     ylab="Newborn Birth Weight (g)", cex.lab=1.5, cex=1.5)
# Plot the the Mean My for each weight
lines(mw.seq, mu.mean,lwd=3, add=TRUE, col="firebrick4")
# Plot a shaded area for the 89% CI
shade(mu.ci, mw.seq, col=col.alpha("peachpuff", .75))



### Add 89% prediction interval for actual birth weights ###
# This will incorporate the SD and its uncertainty, i.e. the spread (sigma) around mu
# To do so, the "rethinking" function "sim" allows simulating from the normal...
# ...distribution of birthweights with parameter mu corresponding to a given ...
#...maternal weight value, while considering the SD around mu, sigma, according...
#...to the posterior
sim.bw<-sim(m1, data=list(mw=mw.seq))
# This creates a matrix with simulated birth weights, with these being created ...
#...for each value of the predictor maternal weight in the sequence. 
str(sim.bw)
# By using "apply" now I can summarise these simulated Birthweights
bw.ci<-apply(sim.bw, 2, PI, prob=.89)
# bw.ci contains the 89% posterior prediction interval of observable...
#...birthweights across the values of maternal weight on X axis.
# I can now add a shaded areas with these CIs
plot(bw ~ mw, data=d, pch=19, col="#D73529", xlab="Maternal Weight (Kg)", 
     ylab="Newborn Birth Weight (g)", cex.lab=1.5, cex=1.5)
shade(bw.ci, mw.seq, col=col.alpha("#1C5A99", .45))
shade(mu.ci, mw.seq, col=col.alpha("#F78E80", .75))
lines(mw.seq, mu.mean,lwd=3, add=TRUE, col="#26456E")
